Querying the DHS API in R
Call the DHS application program interface (API) from R
Walkthrough
Load Packages
= TRUE)
knitr::opts_chunk$set(echo
library(data.table)
library(jsonlite)
library(dplyr)
library(ggplot2) library(DT)
Set up query
# Abbreviated country code, check list here:
# https://dhsprogram.com/data/File-Types-and-Names.cfm#CP_JUMP_10136
=c('BF') # countryIDs in quotes, separated by commas if multiple
countryIds
# Level of stratification ('national' or 'subnational')
=c('subnational')
breakdown
# Indicator(s) of interest. For a full list of 3,785 DHS indicators, look here:
# https://api.dhsprogram.com/rest/dhs/indicators?returnFields=IndicatorId,Label,Definition&f=html
#
# Example malaria indicators:
# - CH_FEVR_C_FEV = % of children U5 (or U3) with fever in the 2 weeks preceding the survey
# - ML_FEVT_C_ADV = % of children U5 with recent (<2wk) fever for whom advice/treatment was sought
# - ML_NETP_H_ITN = % of households with at least 1 ITN
# - ML_IRSM_H_IRS = % of households with IRS in the last 12 months
# - ML_PMAL_C_RDT = Malaria prevalence among children age 6-59 months tested by RDT
# - ML_PMAL_C_RDL = Lower limit of 95% CI for ML_PMAL_C_RDT
# - ML_PMAL_C_RDU = Upper limit of 95% CI for ML_PMAL_C_RDT
#
# Note: some indicators may not apply to all countries/districts/surveys
=c('ML_IRSM_H_IRS','ML_NETP_H_ITN','ML_FEVT_C_ADV','ML_PMAL_C_RDT', 'ML_PMAL_C_RDL', 'ML_PMAL_C_RDU') indicatorIds
Execute query & plot results
#### Build API Query ####
<- "http://api.dhsprogram.com/rest/dhs/data?f=json&surveyid=all"
base_url <- "&lang=en&f=json"
end_url <- paste(base_url,
url "&countryIds=",paste(countryIds,collapse=','),
"&breakdown=",breakdown,
"&indicatorIds=",paste(indicatorIds,collapse=','),
end_url,= '')
sep <- (url)
url
#### Call API ####
<- fromJSON(url) # get page
dhs_data <- dhs_data$Data # save dataframe
dhs_data
#### Plot Results ###
# Trends in intervention indicators by country-region
%>%
dhs_data filter(!IndicatorId %in% c('ML_PMAL_C_RDT', 'ML_PMAL_C_RDL', 'ML_PMAL_C_RDU')) %>%
=SurveyYear, y=Value, color = Indicator)) +
ggplot(aes(x=1) +
geom_line(size=2, shape=21, fill="white") +
geom_point(size~paste(CountryName,CharacteristicLabel,sep='-'), ncol=5) +
facet_wrap(+
theme_minimal() ="bottom",
theme(legend.position= c(0,0),
legend.justification =element_text(angle=90, vjust=0.5),
axis.text.x= element_blank()) +
panel.grid.minor.x =seq(2000,2020,1)) +
scale_x_continuous(breaks=guide_legend(direction = "vertical")) +
guides(color=NULL)
labs(color
# RDT Prevalence trends by country-region (where available)
%>%
dhs_data filter(IndicatorId %in% c('ML_PMAL_C_RDT', 'ML_PMAL_C_RDL', 'ML_PMAL_C_RDU')) %>%
=SurveyYear, y=Value)) +
ggplot(aes(x=SurveyYear)) +
geom_line(aes(group=dhs_data[dhs_data$IndicatorId=="ML_PMAL_C_RDT",],
geom_line(data=1) +
size=2, shape=21, fill="white") +
geom_point(size~paste(CountryName,CharacteristicLabel,sep='-'), ncol=5) +
facet_wrap(+
theme_minimal() ="bottom",
theme(legend.position= c(0,0),
legend.justification =element_text(angle=90, vjust=0.5),
axis.text.x= element_blank()) +
panel.grid.minor.x =seq(2000,2025,1)) +
scale_x_continuous(breaks=guide_legend(direction = "vertical")) +
guides(color"Malaria Prevalence by RDT (U5)") +
ylab(=NULL) labs(color
Get list of available indicators
This section generates an interactive table of indicators at the subnational level for selected surveys/countries/indicators. As written, this code is useful for seeing which indicators were reported and when, for a given area.
This code is very slow!
##########################################################
### View sample of available data for select countries ###
##########################################################
# 1. Get list of surveys from selected countries
################################################
=c('BF') # selected country
countryIds
### Build Query ###
<- "http://api.dhsprogram.com/rest/dhs/surveys?f=json"
base_url
<- paste(base_url,
url "&countryIds=",paste(countryIds,collapse=','),
end_url,= '')
sep <- (url)
url
### Call API ###
<- fromJSON(url) # get page
surveys <- surveys$Data # save dataframe
surveys
<- surveys$SurveyId
surveyIds
# 2. Set list of indicators
###############################################
# You can use all of the malaria indicators below, or a subset, or create a list of any indicators (including non-malaria).
<- c("ML_NETP_H_MOS","ML_NETP_H_ITN","ML_NETP_H_LLN","ML_NETP_H_MNM","ML_NETP_H_MNI","ML_NETP_H_MNL","ML_NETP_H_NUM","ML_NETP_H_UNW","ML_NETP_H_MS2","ML_NETP_H_IT2","ML_NETP_H_LL2","ML_NETP_H_NM2","ML_NETP_H_UN2","ML_IRSM_H_IRS","ML_IRSM_H_IIR","ML_IRSM_H_I2I","ML_IRSM_H_NUM","ML_IRSM_H_UNW","ML_ITNA_P_NUM","ML_ITNA_P_UNW","ML_ITNA_P_ACC","ML_NETU_P_ANY","ML_NETU_P_ITN","ML_NETU_P_LLN","ML_NETU_P_ITI","ML_NETU_P_NUM","ML_NETU_P_UNW","ML_NETU_P_IT1","ML_NETU_P_NM1","ML_NETU_P_UN1","ML_ITNU_N_ITN","ML_ITNU_N_NUM","ML_ITNU_N_UNW","ML_NETC_C_ANY","ML_NETC_C_ITN","ML_NETC_C_LLN","ML_NETC_C_ITI","ML_NETC_C_NUM","ML_NETC_C_UNW","ML_NETC_C_IT1","ML_NETC_C_NM1","ML_NETC_C_UN1","ML_NETW_W_ANY","ML_NETW_W_ITN","ML_NETW_W_LLN","ML_NETW_W_ITI","ML_NETW_W_NUM","ML_NETW_W_UNW","ML_NETW_W_IT1","ML_NETW_W_NM1","ML_NETW_W_UN1","ML_IPTP_W_SPF","ML_IPTP_W_2SP","ML_IPTP_W_3SP","ML_IPTP_W_SPA","ML_IPTP_W_2SA","ML_IPTP_W_3SA","ML_IPTP_W_NUM","ML_IPTP_W_UNW","ML_FEVR_C_FEV","ML_FEVR_C_NUM","ML_FEVR_C_UNW","ML_FEVT_C_ADV","ML_FEVT_C_BLD","ML_FEVT_C_ACT","ML_FEVT_C_ACS","ML_FEVT_C_AML","ML_FEVT_C_AMS","ML_FEVT_C_NUM","ML_FEVT_C_UNW","ML_AMLD_C_ACT","ML_AMLD_C_QNN","ML_AMLD_C_SPF","ML_AMLD_C_CHL","ML_AMLD_C_AMQ","ML_AMLD_C_NUM","ML_AMLD_C_OAM","ML_AMLD_C_UNW","ML_CMLT_C_ANM","ML_CMLT_C_RDT","ML_CMLT_C_MSY","ML_CMLT_C_NUM","ML_HEMO_C_HL8","ML_HEMO_C_NUM","ML_HEMO_C_UNW","ML_PMAL_C_RDT","ML_PMAL_C_RDE","ML_PMAL_C_RDR","ML_PMAL_C_RDL","ML_PMAL_C_RDU","ML_PMAL_C_NMR","ML_PMAL_C_UNR","ML_PMAL_C_UER","ML_PMAL_C_MSY","ML_PMAL_C_MSE","ML_PMAL_C_MSR","ML_PMAL_C_MSL","ML_PMAL_C_MSU","ML_PMAL_C_NMM","ML_PMAL_C_UNM","ML_PMAL_C_UEM","ML_NSRC_N_MDC","ML_NSRC_N_ANC","ML_NSRC_N_IMM","ML_NSRC_N_GHF","ML_NSRC_N_PHF","ML_NSRC_N_PHM","ML_NSRC_N_SHP","ML_NSRC_N_CHW","ML_NSRC_N_REL","ML_NSRC_N_SCL","ML_NSRC_N_OTH","ML_NSRC_N_DKM","ML_NSRC_N_TOT","ML_NSRC_N_NUM","ML_FEVT_C_ADS","ML_AMLD_C_ART")
malaria_indicators
= malaria_indicators # An empty string '' will query ALL indicators (probably very slow)
indicatorIds
=0
counterfor(survey in surveyIds)
{for(indicator in indicatorIds)
{### Build Query ###
<- "http://api.dhsprogram.com/rest/dhs/data?f=json"
base_url
<- paste(base_url,
url "&surveyIds=",survey,
"&breakdown=",breakdown,
"&indicatorIds=",indicator,
end_url,= '')
sep <- (url)
url
### Call API ###
<- fromJSON(url) # get page
indicators_temp if(!is_empty(indicators_temp$Data))
{if(counter==0)
{<- indicators_temp$Data
indicators
}else
{<- rbind.data.frame(indicators,indicators_temp$Data)
indicators
}<- counter+1
counter
}
}
}
### Inspect Available Data ###
#summary(dhs_data)
%>%
indicators %>%
select(SurveyId, IndicatorId, Indicator, IsPreferred, Value,CharacteristicLabel, ByVariableLabel) filter="top", options = list(pageLength = 20, dom = 'ftlip')) datatable(